library(FuzzyNumbers)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(rworldmap)

Análisis exploratorio de los datos

data = read.csv('Corruption Perceptions Index - Dataset.csv', sep = ';')
data$african_union = as.logical(data$african_union)
data$arab_states = as.logical(data$arab_states)
data$BRICS = as.logical(data$BRICS)
data$EU = as.logical(data$EU)
data$G20 = as.logical(data$G20)
data$OECD = as.logical(data$OECD)

head(data)


str(data)
## 'data.frame':    180 obs. of  42 variables:
##  $ country                                            : Factor w/ 180 levels "Afghanistan",..: 117 44 55 121 156 143 155 29 97 116 ...
##  $ ISO3                                               : Factor w/ 180 levels "AFG","AGO","ALB",..: 124 45 54 122 28 142 153 27 99 121 ...
##  $ region                                             : Factor w/ 6 levels "AME","AP","ECA",..: 2 6 6 6 6 2 6 1 6 6 ...
##  $ african_union                                      : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ arab_states                                        : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ BRICS                                              : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ EU                                                 : logi  FALSE TRUE TRUE FALSE FALSE FALSE ...
##  $ G20                                                : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ OECD                                               : logi  TRUE TRUE TRUE TRUE TRUE FALSE ...
##  $ CPI_score_2017                                     : int  89 88 85 85 85 84 84 82 82 82 ...
##  $ standard_error_2017                                : num  2.4 2.75 2.84 1.83 1.71 2.26 2.27 1.49 2.08 2.23 ...
##  $ lower_CI_2017                                      : int  85 83 80 82 82 80 80 80 79 78 ...
##  $ upper_CI_2017                                      : int  93 93 90 88 88 88 88 84 85 86 ...
##  $ number_sources_2017                                : int  8 8 8 8 7 9 8 8 6 8 ...
##  $ CPI_score_2012                                     : int  90 90 90 85 86 87 88 84 80 84 ...
##  $ standard_error_2012                                : num  2.2 2 3 1.6 2.6 2.1 1.9 2.2 2.8 2 ...
##  $ number_sources_2012                                : int  7 7 7 7 6 9 7 7 6 7 ...
##  $ CPI_score_2013                                     : int  91 91 89 86 85 86 89 81 80 83 ...
##  $ standard_error_2013                                : num  2.3 2.2 1.7 2.3 2.5 2.3 2.3 2.4 2.9 2 ...
##  $ number_sources_2013                                : int  7 7 7 7 6 9 7 7 6 7 ...
##  $ CPI_score_2014                                     : int  91 92 89 86 86 84 87 81 82 83 ...
##  $ standard_error_2014                                : num  2.28 2.04 2.05 2.38 2.61 ...
##  $ number_sources_2014                                : int  7 7 7 7 6 8 7 7 6 7 ...
##  $ CPI_score_2015                                     : int  91 91 90 88 86 85 89 83 85 84 ...
##  $ standard_error_2015                                : num  2.32 2.16 1.77 2.24 2.55 ...
##  $ number_sources_2015                                : int  7 7 7 7 6 8 7 7 5 7 ...
##  $ CPI_score_2016                                     : int  90 90 89 85 86 84 88 82 81 83 ...
##  $ standard_error_2016                                : num  2.56 2.46 1.46 1.85 1.57 2.35 1.33 2.03 1.96 2.32 ...
##  $ number_sources_2016                                : int  7 7 7 7 6 8 7 7 6 7 ...
##  $ world_bank_CPIA                                    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ world_economic_forum_EOS                           : int  92 80 92 86 85 90 74 80 86 84 ...
##  $ global_insight_country_risk_ratings                : int  83 83 83 83 83 83 83 83 83 83 ...
##  $ bertelsmann_foundation_transformation_index        : int  0 0 0 0 0 73 0 0 0 0 ...
##  $ african_development_bank_CPIA                      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ IMD_world_competitiveness_yearbook                 : int  93 99 92 85 89 90 84 86 84 89 ...
##  $ bertelsmann_foundation_sustainable_governance_index: int  97 97 88 79 88 0 88 79 79 70 ...
##  $ world_justice_project_rule_of_law_index            : int  82 88 86 86 0 85 86 80 0 80 ...
##  $ PRS_international_country_risk_guide               : int  93 93 93 93 85 79 93 85 85 85 ...
##  $ varieties_of_democracy_project                     : int  77 77 75 77 77 77 77 77 0 77 ...
##  $ economist_intelligence_unit_country_ratings        : int  90 90 72 90 90 90 90 90 72 90 ...
##  $ freedom_house_nations_in_transit_ratings           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ PERC_asia_risk_guide                               : int  0 0 0 0 0 92 0 0 0 0 ...


cols = c('country', 
         'CPI_score_2012',
         'CPI_score_2013',
         'CPI_score_2014', 
         'CPI_score_2015', 
         'CPI_score_2016',
         'CPI_score_2017')
data_cpi = data[cols]
colnames(data_cpi) = c('country', '2012', '2013', '2014', '2015', '2016', '2017')

data_cpi_top_20 = head(data_cpi[sort(data_cpi$`2017`, decreasing = TRUE, index.return=TRUE)$ix, ], 20)
data_cpi_top_20_melt = melt(data_cpi_top_20, id.vars='country')
         
ggplot(data_cpi_top_20_melt, aes(x=variable, y=value, color=country)) + 
  geom_point() +
  geom_line(aes(group=country)) +
  xlab("Year") + ylab("CPI") +
  scale_color_discrete(name="Countries",
                       breaks=data_cpi_top_20$country) +
  ggtitle("20 countries with higher CPI in 2017") +
  theme(plot.title = element_text(hjust = 0.5))


data_cpi_top_20 = head(data_cpi[sort(data_cpi$`2017`, decreasing = FALSE, index.return=TRUE)$ix, ], 20)
data_cpi_top_20_melt = melt(data_cpi_top_20, id.vars='country')
         
ggplot(data_cpi_top_20_melt, aes(x=variable, y=value, color=country)) + 
  geom_point() +
  geom_line(aes(group=country)) +
  xlab("Year") + ylab("CPI") +
  scale_color_discrete(name="Countries",
                       breaks=data_cpi_top_20$country) +
  ggtitle("20 countries with lower CPI in 2017") +
  theme(plot.title = element_text(hjust = 0.5))


CPI_score_cols = c('CPI_score_2012',
                   'CPI_score_2013',
                   'CPI_score_2014', 
                   'CPI_score_2015', 
                   'CPI_score_2016',
                   'CPI_score_2017')
number_sources_cols = c('number_sources_2012',
                        'number_sources_2013',
                        'number_sources_2014', 
                        'number_sources_2015', 
                        'number_sources_2016',
                        'number_sources_2017')
cols = c(CPI_score_cols, number_sources_cols)

# Calculate mean por each region
ame = sapply(data[data$region == 'AME', cols], mean)
we_eu = sapply(data[data$region == 'WE/EU', cols], mean)
ap = sapply(data[data$region == 'AP', cols], mean)
eca = sapply(data[data$region == 'ECA', cols], mean)
mena = sapply(data[data$region == 'MENA', cols], mean)
ssa = sapply(data[data$region == 'SSA', cols], mean)

regions = rbind(ame, we_eu, ap, eca, mena, ssa)
rownames(regions) = c('AME', 'WE/EU', 'AP', 'ECA', 'MENA', 'SSA')

# Melt dataframes by region
cpi_score_df = regions[, CPI_score_cols]
colnames(cpi_score_df) = c('2012', '2013', '2014', '2015', '2016', '2017')
cpi_score_df_melt = melt(cpi_score_df, id.vars=rownames)

number_sources_df = regions[, number_sources_cols]
colnames(number_sources_df) = c('2012', '2013', '2014', '2015', '2016', '2017')
number_sources_df_melt = melt(number_sources_df, id.vars=rownames)
         
# Plot comparative between CPI and number of sources
g1 = ggplot(cpi_score_df_melt, aes(x=Var1, y=value, fill=as.factor(Var2))) + 
  geom_bar(aes(group=Var2), stat = "identity", position = "dodge") +
  xlab("Regions") + ylab("Mean CPI") +
  ggtitle("Mean CPI by region and year") +
  theme(plot.title = element_text(hjust = 0.5),
        legend.title=element_blank())
         
g2 = ggplot(number_sources_df_melt, aes(x=Var1, y=value, fill=as.factor(Var2))) + 
  geom_bar(aes(group=Var2), stat = "identity", position = "dodge") +
  xlab("Regions") + ylab("Mean number of sources") +
  ggtitle("Mean number of sources by region and year") +
  theme(plot.title = element_text(hjust = 0.5),
        legend.title=element_blank())

grid.arrange(g1, g2, nrow = 1)


# Calculate mean por each economic group
african_union = sapply(data[data$african_union == TRUE, cols], mean)
arab_states = sapply(data[data$arab_states == TRUE, cols], mean)
BRICS = sapply(data[data$BRICS == TRUE, cols], mean)
EU = sapply(data[data$EU == TRUE, cols], mean)
G20 = sapply(data[data$G20 == TRUE, cols], mean)
OECD = sapply(data[data$OECD == TRUE, cols], mean)

economic_groups = rbind(african_union, arab_states, BRICS, EU, G20, OECD)
rownames(economic_groups) = c('African Union', 'Arab States', 'BRICS', 'EU', 'G20', 'OECD')

# Melt dataframes by economic group
cpi_score_df = economic_groups[, CPI_score_cols]
colnames(cpi_score_df) = c('2012', '2013', '2014', '2015', '2016', '2017')
cpi_score_df_melt = melt(cpi_score_df, id.vars=rownames)

number_sources_df = economic_groups[, number_sources_cols]
colnames(number_sources_df) = c('2012', '2013', '2014', '2015', '2016', '2017')
number_sources_df_melt = melt(number_sources_df, id.vars=rownames)
         
# Plot comparative between CPI and number of sources
g1 = ggplot(cpi_score_df_melt, aes(x=Var1, y=value, fill=as.factor(Var2))) + 
  geom_bar(aes(group=Var2), stat = "identity", position = "dodge") +
  xlab("Economic groups") + ylab("Mean CPI") +
  ggtitle("Mean CPI by economic group and year") +
  theme(plot.title = element_text(hjust = 0.5),
        legend.title=element_blank())
         
g2 = ggplot(number_sources_df_melt, aes(x=Var1, y=value, fill=as.factor(Var2))) + 
  geom_bar(aes(group=Var2), stat = "identity", position = "dodge") +
  xlab("Economic groups") + ylab("Mean number of sources") +
  ggtitle("Mean number of sources by economic group and year") +
  theme(plot.title = element_text(hjust = 0.5),
        legend.title=element_blank())

grid.arrange(g1, g2, nrow = 1)


Fuzzificación del CPI de 2017

aux1 = data$lower_CI_2017 - 
  (2 * (data$upper_CI_2017 - data$lower_CI_2017) * (1 - (data$number_sources_2017 / 13)))
aux2 = data$lower_CI_2017
aux3 = data$upper_CI_2017
aux4 = data$upper_CI_2017 + 
  (2 * (data$upper_CI_2017 - data$lower_CI_2017) * (data$number_sources_2017 / 13))

colors = rainbow(dim(data)[1])
fuzzy_CPI = c()
for(i in 1:nrow(data)) {
  fuzzy = PiecewiseLinearFuzzyNumber(aux1[i], aux2[i], aux3[i], aux4[i])
  fuzzy_CPI = c(fuzzy_CPI, fuzzy)
  
  add = TRUE
  if (i == 1) {
    add = FALSE
  }
  
  plot(fuzzy, add = add, col = colors[i], xlim = c(-35, 110))
}


Mediana borrosa por región

fuzzy_quantile = function(fuzzy_number_list, deltas, p) {
  # Calculate delta-cuts of all fuzzy numbers
  alphacut_list = c()
  for(d in 1:length(deltas)) {
    alphacut = c()
    for(i in 1:length(fuzzy_number_list)) {
      alphacut = c(alphacut, alphacut(fuzzy_number_list[[i]], deltas[d]))
    }
    alphacut_list[[d]] = sort(alphacut)
  }
  
  freq_dist_L = matrix(rep(0, len = length(deltas) * length(fuzzy_number_list) * 2), 
                       nrow = length(deltas))
  freq_dist_U = matrix(rep(0, len = length(deltas) * length(fuzzy_number_list) * 2), 
                       nrow = length(deltas))
  
  # Iterate over delta-cuts
  for (i in 1:(length(fuzzy_number_list) * 2)) { 
    # Iterate over the fuzzy numbers
    for (fuzzy_num in 1:length(fuzzy_number_list)) { 
      alphacut = alphacut(fuzzy_number_list[[fuzzy_num]], deltas)
      
      # Iterate over deltas
      for (d in 1:length(deltas)) { 
        # Intersec
        if(alphacut[d, 'L'] <= alphacut_list[[d]][i]) { 
          freq_dist_U[d, i] = freq_dist_U[d, i] + 1
        }
        
        # Subset
        if(alphacut[d, 'U'] <= alphacut_list[[d]][i]) { 
          freq_dist_L[d, i] = freq_dist_L[d, i] + 1
        }
      }
    }
  }
  
  freq_dist_L = freq_dist_L / length(fuzzy_number_list)
  freq_dist_U = freq_dist_U / length(fuzzy_number_list)
  
  m = matrix(rep(0, len = 2 * length(deltas)), nrow = length(deltas))
  fuzzy_quantile = data.frame(m, row.names = deltas)
  colnames(fuzzy_quantile) = c('L', 'U')
  
  # Upper
  for(r in 1:nrow(freq_dist_L)) {
    for(c in 1:ncol(freq_dist_L)) {
      if(freq_dist_L[r, c] >= p) {
        fuzzy_quantile[r, 'U'] = alphacut_list[[r]][c]
        break
      }
    }
  }
  
  # Lower
  for(r in 1:nrow(freq_dist_U)) {
    for(c in 1:ncol(freq_dist_U)) {
      if(freq_dist_U[r, c] >= p) {
        fuzzy_quantile[r, 'L'] = alphacut_list[[r]][c]
        break
      }
    }
  }
  
  return(fuzzy_quantile)
}


CPI_score_cols = c('CPI_score_2017')

ame = median(data[data$region == 'AME', 'CPI_score_2017'])
we_eu = median(data[data$region == 'WE/EU', 'CPI_score_2017'])
ap = median(data[data$region == 'AP', 'CPI_score_2017'])
eca = median(data[data$region == 'ECA', 'CPI_score_2017'])
mena = median(data[data$region == 'MENA', 'CPI_score_2017'])
ssa = median(data[data$region == 'SSA', 'CPI_score_2017'])

crisp_median_regions = rbind(ame, we_eu, ap, eca, mena, ssa)
rownames(crisp_median_regions) = c('AME', 'WE/EU', 'AP', 'ECA', 'MENA', 'SSA')
crisp_median_regions
##       [,1]
## AME   38.5
## WE/EU 63.0
## AP    38.0
## ECA   35.0
## MENA  37.5
## SSA   31.0


# Get index from the original data for each region
ame = as.integer(rownames(data[data$region == 'AME', ]))
we_eu = as.integer(rownames(data[data$region == 'WE/EU', ]))
ap = as.integer(rownames(data[data$region == 'AP', ]))
eca = as.integer(rownames(data[data$region == 'ECA', ]))
mena = as.integer(rownames(data[data$region == 'MENA', ]))
ssa = as.integer(rownames(data[data$region == 'SSA', ]))

# Calculate median for each region
deltas = c(0, 0.25, 0.5, 0.75, 1)
p = 0.5
median_fuzzy_CPI_ame = fuzzy_quantile(fuzzy_CPI[ame], deltas, p)
median_fuzzy_CPI_we_eu = fuzzy_quantile(fuzzy_CPI[we_eu], deltas, p)
median_fuzzy_CPI_ap = fuzzy_quantile(fuzzy_CPI[ap], deltas, p)
median_fuzzy_CPI_eca = fuzzy_quantile(fuzzy_CPI[eca], deltas, p)
median_fuzzy_CPI_mena = fuzzy_quantile(fuzzy_CPI[mena], deltas, p)
median_fuzzy_CPI_ssa = fuzzy_quantile(fuzzy_CPI[ssa], deltas, p)

median_fuzzy_regions = list(median_fuzzy_CPI_ame, median_fuzzy_CPI_we_eu, median_fuzzy_CPI_ap, 
                         median_fuzzy_CPI_eca, median_fuzzy_CPI_mena, median_fuzzy_CPI_ssa)
regions = c('AME', 'WE/EU', 'AP', 'ECA', 'MENA', 'SSA')
crisp_unem_women_weight = c()
legend_text = c()
colors = rainbow(length(median_fuzzy_regions))

# Plot median of the fuzzy numbers with the respective crisp number
for (i in 1:length(median_fuzzy_regions)) {
  add = TRUE
  if (i == 1) {
    add = FALSE
  }
  fuzzy = median_fuzzy_regions[[i]]
  fuzzy_median = PiecewiseLinearFuzzyNumber(fuzzy[1, "L"],
                                            fuzzy[length(deltas), "L"],
                                            fuzzy[length(deltas), "U"],
                                            fuzzy[1, "U"],
                                            knot.n = length(deltas),
                                            knot.alpha = sort(deltas),
                                            knot.left = sort(fuzzy[[1]]),
                                            knot.right = sort(fuzzy[[2]]))
  
  plot(fuzzy_median, add = add, col = colors[i], xlim = c(10, 90))
  points(crisp_median_regions[i, ], 1, col = colors[i], pch = 8)
  legend_text = c(legend_text, regions[i])
}
legend('topright', legend=legend_text, lty=1, col=colors)


Map comparativo del CPI crisp con el fuzzificado

fuzzy_CPI_score_2017 = sapply(fuzzy_CPI, expectedValue)
data['fuzzy_CPI_score_2017'] = round(fuzzy_CPI_score_2017)
data[c('country', 'CPI_score_2017', 'fuzzy_CPI_score_2017')]


par(mfrow = c(2, 1), mai = c(0.1, 0.2, 0.2, 0.2))

n = joinCountryData2Map(data, joinCode="ISO3", nameJoinColumn="ISO3")

# Crisp CPI map
map_params = mapCountryData(n, 
                            nameColumnToPlot="CPI_score_2017", 
                            mapTitle="Crisp Corruption Perception Index 2017",
                            missingCountryCol="snow2",
                            colourPalette=c('darkred', 'red', 'yellow'),
                            borderCol='white',
                            addLegend=FALSE,
                            numCats=10,
                            catMethod=seq(from = 0, to = 100, by = 5))

# Fuzzy CPI map
map_params = mapCountryData(n, 
                            nameColumnToPlot="fuzzy_CPI_score_2017", 
                            mapTitle="Fuzzy Corruption Perception Index 2017",
                            missingCountryCol="snow2",
                            colourPalette=c('darkred', 'red', 'yellow'),
                            borderCol='white',
                            addLegend=FALSE,
                            numCats=10,
                            catMethod=seq(from = 0, to = 100, by = 5))
# Create legend
do.call(addMapLegend,
        c(map_params,
          legendLabels="all",
          legendWidth=0.5,
          legendIntervals="data",
          legendMar=5))